package crawler;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class Kernel {
	
	WebList visitedWebs;
	
	public Kernel(String _url) {
		try {
			URL url = new URL(_url);
			Matcher matcher;
			String href;

			BufferedReader inputStream = new BufferedReader(new InputStreamReader(url.openStream()));
			String line;
			Pattern emailPattern = Pattern.compile("[a-zA-Z0-9]+@([a-zA-Z0-9]+\\.)+([a-zA-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)");
			Pattern hrefPattern = Pattern.compile("href=\"(\\S+)\"");
			Pattern externalPattern = Pattern.compile("\\.([a-zA-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)(\\s|\\.|/)");
			while ((line = inputStream.readLine()) != null) {
				matcher = emailPattern.matcher(line);
				while (matcher.find()) {
					System.out.println(matcher.group());
				}
			*/	
				//System.out.println(line);
				matcher = hrefPattern.matcher(line);
				while (matcher.find()) {
					System.out.println("Encontramos algo");
					href = matcher.group();
					System.out.println(href);
					
					Matcher externalMatcher;
					externalMatcher = externalPattern.matcher(href);
					if (externalMatcher.find()) System.out.println("Enlace externo");
				}
				

			}
			inputStream.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
}
